decompress linux kernel

We are in linux now, let's learn something about linux boot protocol before next stage of code study.

On the x86 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as well as the desire in the early days to have the kernel itself be a bootable image, the complicated PC memory model and due to changed expectations in the PC industry caused by the effective demise of real-mode DOS as a mainstream operating system.

Source code of instructions located at address 0x1000000, some defined macro included in compile result for example:


arch/x86/boot/compressed/piggy.S

.section ".rodata.compressed","a",@progbits
.globl z_input_len
z_input_len = 3888088
.globl z_output_len
z_output_len = 8443556
.globl z_extract_offset
z_extract_offset = 0x461000
.globl z_extract_offset_negative
z_extract_offset_negative = -0x461000
.globl input_data, input_data_end
input_data:
.incbin "arch/x86/boot/compressed/vmlinux.bin.gz"
input_data_end:

include/asm/asm-offsets.h

 * DO NOT MODIFY.
 *
 * This file was generated by Kbuild
 *
 */

#define IA32_SIGCONTEXT_ax 44 /* offsetof(struct sigcontext, ax)        # */
#define IA32_SIGCONTEXT_bx 32 /* offsetof(struct sigcontext, bx)        # */
#define IA32_SIGCONTEXT_cx 40 /* offsetof(struct sigcontext, cx)        # */
#define IA32_SIGCONTEXT_dx 36 /* offsetof(struct sigcontext, dx)        # */
#define IA32_SIGCONTEXT_si 20 /* offsetof(struct sigcontext, si)        # */
#define IA32_SIGCONTEXT_di 16 /* offsetof(struct sigcontext, di)        # */
#define IA32_SIGCONTEXT_bp 24 /* offsetof(struct sigcontext, bp)        # */
#define IA32_SIGCONTEXT_sp 28 /* offsetof(struct sigcontext, sp)        # */
#define IA32_SIGCONTEXT_ip 56 /* offsetof(struct sigcontext, ip)        # */

#define CPUINFO_x86 0 /* offsetof(struct cpuinfo_x86, x86)      # */
#define CPUINFO_x86_vendor 1 /* offsetof(struct cpuinfo_x86, x86_vendor)        # */
#define CPUINFO_x86_model 2 /* offsetof(struct cpuinfo_x86, x86_model)  # */
#define CPUINFO_x86_mask 3 /* offsetof(struct cpuinfo_x86, x86_mask)    # */
#define CPUINFO_hard_math 6 /* offsetof(struct cpuinfo_x86, hard_math)  # */
#define CPUINFO_cpuid_level 20 /* offsetof(struct cpuinfo_x86, cpuid_level)     # */
#define CPUINFO_x86_capability 24 /* offsetof(struct cpuinfo_x86, x86_capability)       # */
#define CPUINFO_x86_vendor_id 60 /* offsetof(struct cpuinfo_x86, x86_vendor_id) # */

#define TI_task 0 /* offsetof(struct thread_info, task) # */
#define TI_exec_domain 4 /* offsetof(struct thread_info, exec_domain)   # */
#define TI_flags 8 /* offsetof(struct thread_info, flags)       # */
#define TI_status 12 /* offsetof(struct thread_info, status)    # */
#define TI_preempt_count 20 /* offsetof(struct thread_info, preempt_count)      # */
#define TI_addr_limit 24 /* offsetof(struct thread_info, addr_limit)    # */
#define TI_restart_block 28 /* offsetof(struct thread_info, restart_block)      # */
#define TI_sysenter_return 60 /* offsetof(struct thread_info, sysenter_return)  # */
#define TI_cpu 16 /* offsetof(struct thread_info, cpu)  # */

#define GDS_size 0 /* offsetof(struct desc_ptr, size)   # */
#define GDS_address 2 /* offsetof(struct desc_ptr, address)     # */

#define PT_EBX 0 /* offsetof(struct pt_regs, bx)        # */
#define PT_ECX 4 /* offsetof(struct pt_regs, cx)        # */
#define PT_EDX 8 /* offsetof(struct pt_regs, dx)        # */
#define PT_ESI 12 /* offsetof(struct pt_regs, si)       # */
#define PT_EDI 16 /* offsetof(struct pt_regs, di)       # */
#define PT_EBP 20 /* offsetof(struct pt_regs, bp)       # */
#define PT_EAX 24 /* offsetof(struct pt_regs, ax)       # */
#define PT_DS 28 /* offsetof(struct pt_regs, ds)        # */
#define PT_ES 32 /* offsetof(struct pt_regs, es)        # */
#define PT_FS 36 /* offsetof(struct pt_regs, fs)        # */
#define PT_GS 40 /* offsetof(struct pt_regs, gs)        # */
#define PT_ORIG_EAX 44 /* offsetof(struct pt_regs, orig_ax)     # */
#define PT_EIP 48 /* offsetof(struct pt_regs, ip)       # */
#define PT_CS 52 /* offsetof(struct pt_regs, cs)        # */
#define PT_EFLAGS 56 /* offsetof(struct pt_regs, flags) # */
#define PT_OLDESP 60 /* offsetof(struct pt_regs, sp)    # */
#define PT_OLDSS 64 /* offsetof(struct pt_regs, ss)     # */

#define EXEC_DOMAIN_handler 4 /* offsetof(struct exec_domain, handler)  # */
#define IA32_RT_SIGFRAME_sigcontext 164 /* offsetof(struct rt_sigframe, uc.uc_mcontext) # */

#define pbe_address 0 /* offsetof(struct pbe, address)  # */
#define pbe_orig_address 4 /* offsetof(struct pbe, orig_address)        # */
#define pbe_next 8 /* offsetof(struct pbe, next)        # */
#define TSS_sysenter_sp0 -8572 /* offsetof(struct tss_struct, x86_tss.sp0) - sizeof(struct tss_struct)  # */
#define PAGE_SIZE_asm 4096 /* PAGE_SIZE # */
#define PAGE_SHIFT_asm 12 /* PAGE_SHIFT # */
#define PTRS_PER_PTE 512 /* PTRS_PER_PTE        # */
#define PTRS_PER_PMD 512 /* PTRS_PER_PMD        # */
#define PTRS_PER_PGD 4 /* PTRS_PER_PGD  # */
#define crypto_tfm_ctx_offset 48 /* offsetof(struct crypto_tfm, __crt_ctx)      # */

#define BP_scratch 484 /* offsetof(struct boot_params, scratch) # */
#define BP_loadflags 529 /* offsetof(struct boot_params, hdr.loadflags) # */
#define BP_hardware_subarch 572 /* offsetof(struct boot_params, hdr.hardware_subarch)   # */
#define BP_version 518 /* offsetof(struct boot_params, hdr.version)     # */
#define BP_kernel_alignment 560 /* offsetof(struct boot_params, hdr.kernel_alignment)   # */

#endif

Linux code memory layout is defined in arch/x86/boot/compressed/vmlinux.lds.S


arch/x86/boot/compressed/vmlinux.lds.S

#include <asm-generic/vmlinux.lds.h>

OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)

#undef i386

#include <asm/page_types.h>

#ifdef CONFIG_X86_64
OUTPUT_ARCH(i386:x86-64)
ENTRY(startup_64)
#else
OUTPUT_ARCH(i386)
ENTRY(startup_32)
#endif

SECTIONS
{
        /* Be careful parts of head_64.S assume startup_32 is at
         * address 0.
         */
        . = 0;
        .head.text : {
                _head = . ;
                HEAD_TEXT
                _ehead = . ;
        }
        .rodata.compressed : {
                *(.rodata.compressed)
        }
        .text : {
                _text = .;      /* Text */
                *(.text)
                *(.text.*)
                _etext = . ;
        }
        .rodata : {
                _rodata = . ;
                *(.rodata)       /* read-only data */
                *(.rodata.*)
                _erodata = . ;
        }
        .data : {
                _data = . ;
                *(.data)
                *(.data.*)
                _edata = . ;
        }
        . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
        .bss : {
                _bss = . ;
                *(.bss)
                *(.bss.*)
                *(COMMON)
                . = ALIGN(8);   /* For convenience during zeroing */
                _ebss = .;
        }
#ifdef CONFIG_X86_64
       . = ALIGN(PAGE_SIZE);
       .pgtable : {
                _pgtable = . ;
                *(.pgtable)
                _epgtable = . ;
        }
#endif
        _end = .;

arch/x86/boot/compressed/head_32.S:33

    __HEAD
ENTRY(startup_32)
    cld
    /*
     * Test KEEP_SEGMENTS flag to see if the bootloader is asking
     * us to not reload segments
     */
    testb    $(1<<6), BP_loadflags(%esi) -> 0x1000001:    testb  $0x40,0x211(%esi)
(gdb) info registers esi
esi            0x8b000    569344
(gdb) x/b 0x8b000+0x211
0x8b211:    0x81
    jnz    1f
(gdb) info registers eflags
eflags         0x200046    [ PF ZF ID ]

    cli
    movl    $__BOOT_DS, %eax            -> 0x100000b:    mov    $0x18,%eax
    movl    %eax, %ds
    movl    %eax, %es
    movl    %eax, %fs
    movl    %eax, %gs
    movl    %eax, %ss
1:

/*
 * Calculate the delta between where we were compiled to run
 * at and where we were actually loaded at.  This can only be done
 * with a short local call on x86.  Nothing  else will tell us what
 * address we are running at.  The reserved chunk of the real-mode
 * data at 0x1e4 (defined as a scratch field) are used as the stack
 * for this calculation. Only 4 bytes are needed.
 */
    leal    (BP_scratch+4)(%esi), %esp  -> 0x100001a:    lea 0x1e8(%esi),%esp
(gdb) info registers esp
esp            0x8b1e8    0x8b1e8
    call    1f                          -> 0x1000020:    call   0x1000025
1:    popl    %ebp
(gdb) info registers ebp
ebp            0x1000025    0x1000025
    subl    $1b, %ebp                   -> 0x1000026:    sub    $0x25,%ebp
(gdb) info registers ebp
ebp            0x1000000    0x1000000

/*
 * %ebp contains the address we are loaded at by the boot loader and %ebx
 * contains the address where we should move the kernel image temporarily
 * for safe in-place decompression.
 */

#ifdef CONFIG_RELOCATABLE
    movl    %ebp, %ebx
    movl    BP_kernel_alignment(%esi), %eax -> 0x100002e:    mov    0x230(%esi),%eax
(gdb) info registers esi
esi            0x8b000    569344
(gdb) x/w 0x8b000+0x230
0x8b230:    0x01000000
    decl    %eax
    addl    %eax, %ebx
(gdb) info registers ebx
ebx            0x1ffffff    33554431
    notl    %eax
(gdb) info registers eax
eax            0xff000000    -16777216
    andl    %eax, %ebx
(gdb) info registers ebx
ebx            0x1000000    16777216
#else
    movl    $LOAD_PHYSICAL_ADDR, %ebx
#endif

    /* Target address to relocate to for decompression */
    addl    $z_extract_offset, %ebx       -> add    $0x461000,%ebx
(gdb) info registers ebx
ebx            0x1461000    21368832

    /* Set up the stack */
    leal    boot_stack_end(%ebx), %esp    -> 0x1000041:    lea    0x3be740(%ebx),%esp
(gdb) info registers esp
esp            0x181f740    0x181f740

    /* Zero EFLAGS */
    pushl    $0
    popfl

/*
 * Copy the compressed kernel to the end of our buffer
 * where decompression in place becomes safe.
 */
    pushl    %esi
    leal    (_bss-4)(%ebp), %esi          -> 0x100004b:    lea    0x3b973c(%ebp),%esi
(gdb) info registers esi
esi            0x13b973c    20682556
    leal    (_bss-4)(%ebx), %edi          -> 0x1000051:    lea    0x3b973c(%ebx),%edi
(gdb) info registers edi
edi            0x181a73c    25274172
    movl    $(_bss - startup_32), %ecx    -> 0x1000057:    mov    $0x3b9740,%ecx
    shrl    $2, %ecx
(gdb) info registers ecx
ecx            0xee5d0    976336
    std
    rep    movsl
    cld
    popl    %esi
(gdb) info registers esi
esi            0x8b000    569344
/*
 * Jump to the relocated address.
 */
    leal    relocated(%ebx), %eax         -> 0x1000064:    lea    0x3b5450(%ebx),%eax
    jmp    *%eax
ENDPROC(startup_32)

    .text
relocated:

/*
 * Clear BSS (stack is currently empty)
 */
    xorl    %eax, %eax
    leal    _bss(%ebx), %edi                      -> 0x1816452:    lea    0x3b9740(%ebx),%edi
(gdb) info registers edi
edi            0x181a740    25274176
    leal    _ebss(%ebx), %ecx                     -> 0x1816458:    lea    0x3be768(%ebx),%ecx
    subl    %edi, %ecx
(gdb) info registers ecx
ecx            0x5028    20520
    shrl    $2, %ecx
(gdb) info registers ecx
ecx            0x140a    5130
    rep    stosl                                -> 0x1816463:    rep stos %eax,%es:(%edi)

/*
 * Do the decompression, and jump to the new kernel..
 */
    leal    z_extract_offset_negative(%ebx), %ebp -> 0x1816465:    lea    -0x461000(%ebx),%ebp
                /* push arguments for decompress_kernel: */
(gdb) info registers ebp
ebp            0x1000000    0x1000000
    pushl    %ebp        /* output address */
    pushl    $z_input_len    /* input_len */        -> 0x181646c:    push   $0x3b53d8
    leal    input_data(%ebx), %eax                -> 0x1816471:    lea    0x6c(%ebx),%eax
(gdb) info registers eax
eax            0x146106c    21368940
    pushl    %eax        /* input_data */
    leal    boot_heap(%ebx), %eax                 -> 0x1816478:    lea    0x3b9740(%ebx),%eax
(gdb) info registers eax
eax            0x181a740    25274176
    pushl    %eax        /* heap area */
    pushl    %esi        /* real mode pointer */
    call    decompress_kernel                     -> 0x1816480:    call   0x1819250
    addl    $20, %esp
(gdb) info registers esp
esp            0x181f740    0x181f740

#if CONFIG_RELOCATABLE
/*
 * Find the address of the relocations.
 */
    leal    z_output_len(%ebp), %edi                      -> 0x1816488:    lea    0x80d6a4(%ebp),%edi
(gdb) info registers edi
edi            0x180d6a4    25220772

/*
 * Calculate the delta between where vmlinux was compiled to run
 * and where it was actually loaded.
 */
    movl    %ebp, %ebx
(gdb) info registers ebx
ebx            0x1000000    16777216
    subl    $LOAD_PHYSICAL_ADDR, %ebx                    -> 0x1816490:    sub    $0x1000000,%ebx
(gdb) info registers ebx
ebx            0x0    0
(gdb) info registers eflags
eflags         0x46    [ PF ZF ]
    jz    2f    /* Nothing to be done if loaded at compiled addr. */     -> 0x181649f:    je     0x18164aa
/*
 * Process relocations.
 */

1:    subl    $4, %edi
    movl    (%edi), %ecx
    testl    %ecx, %ecx
    jz    2f
    addl    %ebx, -__PAGE_OFFSET(%ebx, %ecx)
    jmp    1b
2:
#endif

/*
 * Jump to the decompressed kernel.
 */
    xorl    %ebx, %ebx
(gdb) info registers ebp
ebp            0x1000000    0x1000000
    jmp    *%ebp

/*
 * Stack and heap for uncompression
 */
    .bss
    .balign 4
boot_heap:
    .fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
    .fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:

About decompress_kernel routine, its source code as follow.


arch/x86/boot/compressed/misc.c:304

asmlinkage void decompress_kernel(void *rmode, memptr heap,
                                  unsigned char *input_data,
                                  unsigned long input_len,
                                  unsigned char *output)
{
        real_mode = rmode;

        if (real_mode->hdr.loadflags & QUIET_FLAG)
                quiet = 1;

        if (real_mode->screen_info.orig_video_mode == 7) {
                vidmem = (char *) 0xb0000;
                vidport = 0x3b4;
        } else {
                vidmem = (char *) 0xb8000;
                vidport = 0x3d4;
        }

        lines = real_mode->screen_info.orig_video_lines;
        cols = real_mode->screen_info.orig_video_cols;

        free_mem_ptr     = heap;        /* Heap */
        free_mem_end_ptr = heap + BOOT_HEAP_SIZE;

        if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
                error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
        if (heap > 0x3fffffffffffUL)
                error("Destination address too large");
#else
        if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
                error("Destination address too large");
#endif
#ifndef CONFIG_RELOCATABLE
        if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
                error("Wrong destination address");
#endif

        if (!quiet)
                putstr("\nDecompressing Linux... ");
        decompress(input_data, input_len, NULL, NULL, output, NULL, error);
        parse_elf(output);
        if (!quiet)
                putstr("done.\nBooting the kernel.\n");
        return;
}

Links

results matching ""

    No results matching ""